We want to fit JSDMS in French Guiana, one considering only bioclimatic variables, and the other one including species traits as additional explanatory variables.
# mail vendredi prévoir installer jSDM et gecevar
install.packages("jSDM")
# Rtools
devtools::install_github("ghislainv/gecevar")
# grass, gdal, osmctools
Forest inventories carried out in French Guiana, in the context of Guyafor project are available. We use these forest inventories to calculate a matrix indicating the presence by a \(1\) and the absence by a \(0\) of the species at each site by removing observations for which the species is not identified and rare species with less than 5 observations on all sites. This matrix therefore records the occurrences of \(290\) species at \(36\) sites.
library(readr)
## Guyafor
f <- "/home/clement/Documents/projet_METRADICA/data/Inventaires/202110_DonneesModeleMetradicaTout+Diam.csv"
df <- readr::read_delim(f, delim=";")
df <- df[df$Project=="Guyafor",]
nplots <- length(unique(df$idPlot))
# Remove indeterminate taxa
df <- df[-grep("Indet", df$Taxon), ]
# Remove taxa not defined at species but subspecies level
df <- df[-grep("subsp.", df$Taxon), ]
nsp <- length(unique(df$Taxon))
# forest inventory
trees <- df
# presence/absence of each species on each plot
nplot <- length(unique(trees$idPlot))
species <- unique(trees$Taxon)
nsp <- length(species)
# presence/absence of each species on each plot
PA <- matrix(0, nplot, nsp)
rownames(PA) <- paste(sort(as.numeric(unique(trees$idPlot))))
colnames(PA) <- unique(trees$Taxon)
for (i in 1:nplot){
for (j in 1:nsp){
idx <- which(trees$Taxon == species[j])
PA[paste(trees$idPlot[idx]),j] <- 1
}
}
# Remove very rare species
rare_species <- which(colSums(PA)<=5)
trees <- trees[!(trees$Taxon %in% names(rare_species)),]
PA <- PA[ ,which(colSums(PA)>5)]
nsp <- ncol(PA)
nplot <- nrow(PA)
species <- colnames(PA)
We represent the presence-absence matrix obtained for the first 20 species
f <- "/home/clement/Documents/projet_METRADICA/CWD-TWI-Marion/METRADICA.csv"
df <- readr::read_delim(f, delim=";")
# Remove species not recorded in Guyafor inventory
df <- df[df$Name %in% species,]
nsp <- length(unique(df$Name))
# Mean species traits
library(dplyr)
Tr <- df %>%
group_by(Name) %>%
summarise_at(vars(Gmin, FvFm, TLP, LA, SLA, LSWC, FW, SW, DW,
Midrib.VLA, X2VLA, X3VLA, MajVLA, MidribWidth,
SecundaryWidth),
mean, na.rm=TRUE)
Tr[,"Type"] <- c(unique(df[df$Name %in% Tr$Name, c("Name","Type")])[,"Type"])
Tr <- Tr[, c(1,17,2:16)]
We use the function get_chelsa_current from the R package gecevar to download a set of climatic data for French Guiana from the website: .
library(gecevar)
name <- "French Guiana"
epsg <- 2972
output_file <- "/home/clement/Documents/presentations/workshop-Metradica-March2023/jSDM_tutorial_cache"
setwd("/home/clement/Documents/presentations/workshop-Metradica-March2023/jSDM_tutorial_cache/")
# Get French Guiana extent in the specified coordinates system (EPSG)
output <- transform_shp_country_extent(EPSG = epsg,
country_name = name,
rm_download=FALSE)
save(output,
file="/home/clement/Documents/presentations/workshop-Metradica-March2023/jSDM_tutorial_cache/output.RData")
extent <- output[[1]][1]
extent_latlon <- as.numeric(output[[1]][2:5])
clim_path <- get_chelsa_current(extent_latlon = extent_latlon,
extent = extent,
EPSG = epsg,
destination = output_file,
resolution = 1000,
rm_download = TRUE)
| Variable | Unit |
|---|---|
| Minimum monthly temperatures | °C x 10 |
| Maximum monthly temperatures | °C x 10 |
| Average monthly temperatures | °C x 10 |
| Monthly precipitation | \(kg.m^{-2}.month^-1\) |
| Cloud cover | % |
| Climatic water deficit (Thornthwaite) | \(kg.m^{-2}\) |
| Potential evapotranspiration (Thornthwaite) | \(kg.m^{-2}.month^-1\) |
| Number of dry months (Thornthwaite) | months |
| Climatic water deficit (Penman-Monteith) | \(kg.m^{-2}\) |
| Potential evapotranspiration (Penman-Monteith) | \(kg.m^{-2}.month^-1\) |
| Number of dry months (Penman-Monteith) | months |
| Annual mean temperature (bio1 or temp) | °C x 10 |
| Diurnal temperature range (bio2) | °C x 10 |
| Isothermality (bio3=bio2/bio7) | °C x 10 |
| Seasonality of temperatures (bio4 or sais_temp) | °C x 10 |
| Maximum temperature of the warmest month (bio5) | °C x 10 |
| Minimum temperature of the coldest month (bio6) | °C x 10 |
| Annual temperature range (bio7=bio5-bio6) | °C x 10 |
| Average temperature of the wettest quarter (bio8) | °C x 10 |
| Average temperature of the driest quarter (bio9) | °C x 10 |
| Average temperature of the warmest quarter (bio10) | °C x 10 |
| Average temperature of the coldest quarter (bio11) | °C x 10 |
| Cumulative annual precipitation (bio12 or prec) | \(kg.m^{-2}.year^{-1}\) |
| Cumulative precipitation of wettest month (bio13) | \(kg.m^{-2}.month^{-1}\) |
| Cumulative precipitation of the driest month (bio14) | \(kg.m^{-2}.month^{-1}\) |
| Seasonality of rainfall (bio15 or sais_prec) | \(kg.m^{-2}\) |
| Precipitation in wettest quarter (bio16) | \(kg.m^{-2}.month^{-1}\) |
| Precipitation of driest quarter (bio17) | \(kg.m^{-2}.month^{-1}\) |
| Warmest quarter precipitation (bio18) | \(kg.m^{-2}.month^{-1}\) |
| Coldest quarter precipitation (bio19) | \(kg.m^{-2}.month^{-1}\) |
Among the climatic data downloaded, concerning the whole French Guiana territory at present (interpolations of representative observed data from the years 1960-1990). We choose to use the following variables because they have an ecological meaning which makes them easily interpretable and are little correlated between them according to the article Vieilledent et al. (2013) :
We consider also the quadratic effects of these climate variables to perform a quadratic regression, which is more suitable for fitting a niche model than a linear regression.
clim_path <- "/home/clement/Documents/presentations/workshop-Metradica-March2023/jSDM_tutorial_cache/data_raw/current_chelsa.tif"
load("/home/clement/Documents/presentations/workshop-Metradica-March2023/jSDM_tutorial_cache/output.RData"
)
# get interesting covariates among the climatic variables downloaded
clim_var <- terra::rast(clim_path,
lyrs=c("bio1", "bio4", "bio12", "bio15", "cwd_penman"))
names(clim_var) <- c("temp", "sais_temp", "prec", "sais_prec", "cwd")
proj <- terra::crs(clim_var)
# Data restricted to French Guiana's borders
borders <- terra::vect(output[[2]], layer="gadm36_GUF_0")
borders <- terra::project(borders, proj)
clim_var <- terra::mask(clim_var, borders)
# representation
par(oma=c(0,0,2,1))
terra::plot(clim_var, legend=TRUE)
title("Current bioclimatic variables", outer=TRUE, cex=0.8)
# spatial points of each plot
coords <- unique(cbind(as.numeric(trees$Xutm),
as.numeric(trees$Yutm),
as.numeric(trees$idPlot),
trees$SourceCoord, trees$UTMZone))
colnames(coords) <- c("Xutm","Yutm","plot", "sourceCoord", "UTMZone")
coords_plot <- coords[coords[,"sourceCoord"]=="plot",]
coords_tree <- data.frame(coords[coords[,"sourceCoord"]=="tree",])
coords_tree <- coords_tree[!(coords_tree[,"plot"] %in% coords_plot[,"plot"]), ]
sites <- unique(coords_tree[,"plot"])
coords <- coords_plot
for(i in 1:length(sites)){
mat <- coords_tree[coords_tree$plot==sites[i], ]
coords <- rbind(coords, mat[which.min(mat$Xutm),])
}
coords$plot <- as.numeric(coords$plot)
coords$Xutm <- as.numeric(coords$Xutm)
coords$Yutm <- as.numeric(coords$Yutm)
coords <- coords[sort(coords[,3], index.return=TRUE)$ix,]
# UTM22N coordinates system
xy <- terra::vect(coords[,1:2], crs=proj, geom=c("Xutm", "Yutm"))
# terra::writeVector(xy, overwrite=TRUE, "~/Documents/presentations/workshop-Metradica-March2023/jSDM_tutorial_cache/xy.shp")
# Add squared data
clim_var2 <- c(clim_var, clim_var^2)
names(clim_var2) <- c(names(clim_var), paste0(names(clim_var),
rep("2", dim(clim_var)[3])))
# extract climatic data on each plot
clim2 <- terra::extract(clim_var2, xy)
pos <- coords[,1:3]
colnames(pos) <- c("Xutm","Yutm","plot")
# Add squared data
data_clim2 <- data.frame(cbind(clim2[,-1], pos))
nparam <- ncol(data_clim2) -3
library(tidyverse)
# reduced centered data
scaled_data_clim2 <- scale(data_clim2[,1:nparam])
means <- attr(scaled_data_clim2,"scaled:center")
sds <- attr(scaled_data_clim2,"scaled:scale")
scaled_data_clim2 <- as_tibble(cbind(plot=data_clim2$plot,
scaled_data_clim2,
Xutm=data_clim2$Xutm,
Yutm=data_clim2$Yutm))
write.csv(scaled_data_clim2, file ="~/Documents/presentations/workshop-Metradica-March2023/jSDM_tutorial_files/scaled_clim.csv")
## Design matrix
X <- data.frame(intercept=rep(1,nplot),
dplyr::select(scaled_data_clim2,-Xutm,-Yutm, -plot))
np <- ncol(X)
# save raster in .tif format
# Center and reduce climatic variables
# using means and standard deviations of climatic variables at inventory sites
scaled_clim_var <- (clim_var2-means)/sds
names(scaled_clim_var) <- names(clim_var2)
terra::writeRaster(scaled_clim_var,
"~/Documents/presentations/workshop-Metradica-March2023/jSDM_tutorial_cache/scaled_clim.tif",
gdal=c("COMPRESS=LZW", "PREDICTOR=2"), overwrite=TRUE)
The values of these climate variables corresponding to the coordinates of the inventory plots are extracted and scaled to obtain the following data-set where coordinates of the sites will then be used for spatial interpolation and to spatially represent the results.
Referring to the models used in the articles Warton et al. (2015) and Albert & Siddhartha (1993), we define the following latent variable model (LVM) to account for species co-occurrence on all sites :
\[ \mathrm{probit}(\theta_{ij}) =\alpha_i + X_i.\beta_j+ W_i.\lambda_j \]
Link function probit: \(\mathrm{probit}: q \rightarrow \Phi^{-1}(q)\) where \(\Phi\) correspond to the distribution function of the reduced centered normal distribution.
Response variable: \(Y=(y_{ij})^{i=1,\ldots, n_{site}}_{j=1,\ldots,n_{species}}\) with:
\[y_{ij}=\begin{cases} 0 & \text{ if species $j$ is absent on the site $i$}\\ 1 & \text{ if species $j$ is present on the site $i$}. \end{cases}\]
\[y_{ij}=\begin{cases} 1 & \text{if} \ z_{ij} > 0 \\ 0 & \text{otherwise.} \end{cases}\]
It can be easily shown that: \(y_{ij} \sim \mathcal{B}ernoulli(\theta_{ij})\).
Explanatory variables:
\(\beta_j=(\beta_{j0}, \beta_{j1},\ldots,\beta_{jp})'\) are the intercept and regression coefficients corresponding to the bioclimatic or environmental variables for species \(j\) assumed to be fixed effects.
\(\alpha_i\) represents the random effect of site \(i\) such as \(\alpha_i \sim \mathcal{N}(0,V_{\alpha})\) and we assume that \(V_{\alpha} \sim \mathcal {IG}(\text{shape}=0.5, \text{rate}=0.0005)\) as prior distribution by default.
Latent variables: \(W_i=(W_{i1},\ldots,W_{iq})\) where \(q\) is the number of latent variables considered, which has to be fixed by the user (by default \(q=2\)). We assume that \(W_i \sim \mathcal{N}(0,I_q)\) and we define the associated coefficients \(\lambda_j=(\lambda_{j1},\ldots, \lambda_{jq})'\), also known as “factor loadings” (Warton et al. 2015). We use a prior distribution \(\mathcal{N}(0,10)\) for all lambda not concerned by constraints to \(0\) on upper diagonal and to strictly positive values on diagonal.
This model is equivalent to a multivariate GLMM \(\mathrm{g}(\theta_{ij}) = \alpha_i + X_i.\beta_j + u_{ij}\), where \(u_{ij} \sim \mathcal{N}(0, \Sigma)\) with the constraint that the variance-covariance matrix \(\Sigma = \Lambda \Lambda^{\prime}\), where \(\Lambda\) is the full matrix of factor loadings, with the \(\lambda_j\) as its columns.
We fit a binomial joint species distribution model, including two latent variables and random site effect using the jSDM_binomial_probit() function to perform binomial probit regression considering all the species from the data described above, by performing \(20 000\) iterations including \(10 000\) of burn-in and we retain \(N_{samp}=1 000\) values for each parameter of the model.
PA_noname <- PA
colnames(PA_noname) <- paste0("sp_", 1:ncol(PA))
# Fitting JSDM from French Guiana data set
T1<- Sys.time()
mod <- jSDM::jSDM_binomial_probit(
# Response variable
presence_data = PA_noname,
# Explanatory variables
site_formula = ~ temp + prec + sais_temp + sais_prec + cwd + temp2 + prec2 + sais_temp2 + sais_prec2 + cwd2,
site_data=scaled_data_clim2,
n_latent=2,
site_effect="random",
# Chains
burnin= 10000, mcmc=10000, thin=10,
# Starting values
alpha_start=0, beta_start=0,
lambda_start=0, W_start=0,
V_alpha=1,
# Priors
shape_Valpha=0.5,
rate_Valpha=0.0005,
mu_beta=0, V_beta=10,
mu_lambda=0, V_lambda=10,
# Various
seed=1234, verbose=1)
T2 <- Sys.time()
T_FG <- difftime(T2,T1)
save(mod, T_FG,
file="~/Documents/presentations/workshop-Metradica-March2023/jSDM_tutorial_cache/mod_FG.RData")
The MCMC algorithm is used to obtain draws from the posterior distribution of the parameters. We use as estimator for each parameter the mean of the \(N_{samp}\) values estimated in corresponding MCMC chain and we save the estimated parameters in .csv format.
load("~/Documents/presentations/workshop-Metradica-March2023/jSDM_tutorial_cache/mod_FG.RData")
species <- colnames(mod$model_spec$presence_data)
nplot <- nrow(mod$model_spec$presence_data)
nsp <- ncol(mod$model_spec$presence_data)
n_latent <- mod$model_spec$n_latent
np <- nrow(mod$model_spec$beta_start)
## Save parameters
### alphas
alphas <- apply(mod$mcmc.alpha,2,mean)
### V_alpha
V_alpha <- mean(mod$mcmc.V_alpha)
### latent variables
W1 <- colMeans(mod$mcmc.latent[["lv_1"]])
W2 <- colMeans(mod$mcmc.latent[["lv_2"]])
params_sites <- data.frame(plot = scaled_data_clim2$plot,
Xutm = scaled_data_clim2$Xutm,
Yutm = scaled_data_clim2$Yutm,
alphas, V_alpha = rep(V_alpha,nplot),W1,W2)
### fixed species effect lambdas and betas
lambdas <- matrix(0,nsp,n_latent)
betas <- matrix(0,nsp,np)
for (j in 1:nsp){
for (l in 1:n_latent){
lambdas[j,l] <- mean(mod$mcmc.sp[[j]][,np+l])
}
for (p in 1:np){
betas[j,p] <- mean(mod$mcmc.sp[[j]][,p])
}
}
colnames(betas) <- colnames(mod$mcmc.sp[[1]])[1:np]
params_species <- data.frame(species=species, Id_species = c(1:nsp),
betas, lambda_1 = lambdas[,1], lambda_2 = lambdas[,2])
We fit a binomial joint species distribution model, including two latent variables, 5 species traits and random site effect using the jSDM_binomial_probit() function to perform binomial probit regression considering all the species from the data described above, by performing \(20 000\) iterations including \(10 000\) of burn-in and we retain \(N_{samp}=1 000\) values for each parameter of the model.
PA_Tr <- PA[ ,colnames(PA) %in% Tr$Name]
colnames(PA_Tr) <- paste0("sp_", 1:ncol(PA_Tr))
write.csv(PA_Tr, file ="~/Documents/presentations/workshop-Metradica-March2023/jSDM_tutorial_files/PA_Tr.csv")
# Fitting JSDM from French Guiana datasets including traits
T1<- Sys.time()
mod_Tr <- jSDM::jSDM_binomial_probit(
# Response variable
presence_data = PA_Tr,
# Explanatory variables
site_formula = ~ temp + prec + sais_temp + sais_prec + cwd + temp2 + prec2 + sais_temp2 + sais_prec2 + cwd2,
site_data=scaled_data_clim2,
# Only one interaction
# trait_formula = ~ cwd:Gmin,
# All interactions
trait_formula = ~.,
trait_data = data.frame(scale(Tr[,c("Gmin", "FvFm", "TLP", "SLA")])),
n_latent=2,
site_effect="random",
# Chains
burnin= 10000, mcmc=10000, thin=10,
# Starting values
alpha_start=0, beta_start=0,
lambda_start=0, W_start=0,
V_alpha=1,
# Priors
shape_Valpha=0.5,
rate_Valpha=0.0005,
V_beta=10,
mu_lambda=0, V_lambda=10,
mu_gamma=0, V_gamma=10,
# Various
seed=1234, verbose=1)
T2 <- Sys.time()
T_FG_Tr <- difftime(T2,T1)
save(mod_Tr, T_FG_Tr,
file="~/Documents/presentations/workshop-Metradica-March2023/jSDM_tutorial_cache/mod_FG_Tr.RData")
The MCMC algorithm is used to obtain draws from the posterior distribution of the parameters. We use as estimator for each parameter the mean of the \(N_{samp}\) values estimated in corresponding MCMC chain
We visually evaluate the convergence of MCMCs by representing the trace and density a posteriori of the estimated parameters.
load("~/Documents/presentations/workshop-Metradica-March2023/jSDM_tutorial_cache/mod_FG.RData")
np <- nrow(mod$model_spec$beta_start)
species <- colnames(mod$model_spec$presence_data)
## alpha_i of the first site
par(mfrow=c(1,2),oma=c(1, 0, 1.4, 0))
coda::traceplot(mod$mcmc.alpha[,1],
main="Trace of alpha_1",
cex.main=1.6)
coda::densplot(mod$mcmc.alpha[,1],
main="Density of alpha_1",
cex.main=1.6)
abline(v=mean(mod$mcmc.alpha[,1]),col="blue")
title(main="Random site effect alpha_1",outer=T,cex.main=1.8)
## V_alpha
coda::traceplot(mod$mcmc.V_alpha,main="Trace of V_alpha",cex.main=1.6)
coda::densplot(mod$mcmc.V_alpha,main="Density of V_alpha", cex.main=1.6)
abline(v=mean(mod$mcmc.V_alpha),col="blue")
title(main=" Variance of random site effects",outer=T, cex.main=1.8)
## beta_j of the first two species
for (j in 1:2) {
par(mfrow=c(2,2),oma=c(1, 0, 1.4, 0))
for (p in 1:np) {
coda::traceplot(mod$mcmc.sp[[j]][,p],
main=paste("Trace of", colnames(mod$mcmc.sp[[j]])[p]),cex.main=1.3)
coda::densplot(mod$mcmc.sp[[j]][,p],
main=paste("Density of", colnames(mod$mcmc.sp[[j]])[p]),cex.main=1.3)
abline(v=mean(mod$mcmc.sp[[j]][,p]),col="blue")
if(p==1) title(main=paste("Fixed species effect beta for species", species[j]), outer=T, cex.main=1.6)
}
}
## lambda_j of the first two species
n_latent <- mod$model_spec$n_latent
par(mfrow=c(n_latent,2),oma=c(1, 0, 1.4, 0))
for (j in 1:2) {
for (l in 1:n_latent) {
coda::traceplot(mod$mcmc.sp[[j]][,np+l],
main = paste("Trace of", colnames(mod$mcmc.sp[[j]])[np+l]),cex.main=1.6)
coda::densplot(mod$mcmc.sp[[j]][,np+l],
main = paste("Density of", colnames(mod$mcmc.sp[[j]])[np+l]),cex.main=1.6)
abline(v=mean(mod$mcmc.sp[[j]][,np+l]),col="blue")
}
title(main=paste("Factor loadings lambda for species ", species[j]),outer=T,cex.main=1.8)
}
## Latent variables W_i for one site
par(mfrow=c(2,2),oma=c(1, 0, 1.4, 0))
for (l in 1:n_latent) {
coda::traceplot(mod$mcmc.latent[[paste0("lv_",l)]][,1],
main = paste0(" Trace of W_1", l),cex.main=1.6)
coda::densplot(mod$mcmc.latent[[paste0("lv_",l)]][,1],
main = paste0(" Density of W_1", l),cex.main=1.6)
abline(v=mean(mod$mcmc.latent[[paste0("lv_",l)]][,1]),col="blue")
}
title(main="Latentes variables W_1", outer=T,cex.main=1.8)
## Deviance
par(mfrow=c(1,2),oma=c(1, 0, 1.5, 0))
coda::traceplot(mod$mcmc.Deviance,main="Trace",cex.main=1.6)
coda::densplot(mod$mcmc.Deviance,main="Density",cex.main=1.6)
abline(v=mean(mod$mcmc.Deviance),col="blue")
title(main = "Deviance",outer=T,cex.main=1.8)
# theta
par(mfrow=c(1,2))
hist(mod$probit_theta_latent,col="light blue", main = "probit(theta) estimated")
hist(mod$theta_latent, breaks=pretty, col="light blue", main = "theta estimated")
Overall, the traces and densities of the parameters indicate the convergence of the algorithm. Indeed, we observe on the traces that the values oscillate around averages without showing any upward or downward trend and we see that the densities are quite smooth and for most of them of Gaussian form.
We visually evaluate the convergence of MCMCs by representing the trace and density a posteriori of the estimated parameters.
load("~/Documents/presentations/workshop-Metradica-March2023/jSDM_tutorial_cache/mod_FG_Tr.RData")
np <- nrow(mod_Tr$model_spec$beta_start)
nt <- nrow(mod_Tr$model_spec$gamma_start)
species <- colnames(mod_Tr$model_spec$presence_data)
## boxlot of gamma corresponding to each covariable
par(mfrow=c(2,1))
for (p in 1:np){
gamma_p <- mod_Tr$mcmc.gamma[[p]]
colnames(gamma_p) <- c("gamma_Int", colnames(gamma_p)[-1])
boxplot.matrix(gamma_p,
main=names(mod_Tr$mcmc.gamma)[p])
}
# # create data for segments
# n <- ncol(mod$gamma)
# # width of each boxplot is 0.8
# x0s <- 1:n - 0.4
# x1s <- 1:n + 0.4
# # these are the y-coordinates for the horizontal lines
# # that you need to set to the desired values.
# y0s <- mean(mod_Tr$mcmc.gamma[[p]])
# # add segments
# segments(x0 = x0s, x1 = x1s, y0 = y0s, col = "red", lwd=2)
par(mfrow=c(3,2), oma=c(0,0,2,0))
for (p in 1:np){
for (t in 1:nt){
coda::traceplot(mod_Tr$mcmc.gamma[[p]][,t],
main=paste0("Trace of ", colnames(mod_Tr$mcmc.gamma[[p]])[t], ".",
names(mod_Tr$mcmc.gamma)[p]), cex.main=1.5)
coda::densplot(mod_Tr$mcmc.gamma[[p]][,t],
main=paste0("Density of ", colnames(mod_Tr$mcmc.gamma[[p]])[t], ".",
names(mod_Tr$mcmc.gamma)[p]), cex.main=1.5)
abline(v=mean(mod_Tr$mcmc.gamma[[p]][,t]),col="blue")
}
}
## alpha_i of the first site
par(mfrow=c(1,2),oma=c(1, 0, 1.4, 0))
coda::traceplot(mod_Tr$mcmc.alpha[,1],
main="Trace of alpha_1",
cex.main=1.6)
coda::densplot(mod_Tr$mcmc.alpha[,1],
main="Density of alpha_1",
cex.main=1.6)
abline(v=mean(mod_Tr$mcmc.alpha[,1]),col="blue")
title(main="Random site effect alpha_1",outer=T,cex.main=1.8)
## V_alpha
coda::traceplot(mod_Tr$mcmc.V_alpha,main="Trace of V_alpha",cex.main=1.6)
coda::densplot(mod_Tr$mcmc.V_alpha,main="Density of V_alpha", cex.main=1.6)
abline(v=mean(mod_Tr$mcmc.V_alpha),col="blue")
title(main=" Variance of random site effects",outer=T, cex.main=1.8)
## beta_j of the first two species
for (j in 1:2) {
par(mfrow=c(2,2),oma=c(1, 0, 1.4, 0))
for (p in 1:np) {
coda::traceplot(mod_Tr$mcmc.sp[[j]][,p],
main=paste("Trace of", colnames(mod_Tr$mcmc.sp[[j]])[p]),cex.main=1.3)
coda::densplot(mod_Tr$mcmc.sp[[j]][,p],
main=paste("Density of", colnames(mod_Tr$mcmc.sp[[j]])[p]),cex.main=1.3)
abline(v=mean(mod_Tr$mcmc.sp[[j]][,p]),col="blue")
if(p==1) title(main=paste("Fixed species effect beta for species", species[j]), outer=T, cex.main=1.6)
}
}
## lambda_j of the first two species
n_latent <- mod_Tr$model_spec$n_latent
par(mfrow=c(n_latent,2),oma=c(1, 0, 1.4, 0))
for (j in 1:2) {
for (l in 1:n_latent) {
coda::traceplot(mod_Tr$mcmc.sp[[j]][,np+l],
main = paste("Trace of", colnames(mod_Tr$mcmc.sp[[j]])[np+l]),cex.main=1.6)
coda::densplot(mod_Tr$mcmc.sp[[j]][,np+l],
main = paste("Density of", colnames(mod_Tr$mcmc.sp[[j]])[np+l]),cex.main=1.6)
abline(v=mean(mod_Tr$mcmc.sp[[j]][,np+l]),col="blue")
}
title(main=paste("Factor loadings lambda for species ", species[j]),outer=T,cex.main=1.8)
}
## Latent variables W_i for one site
par(mfrow=c(2,2),oma=c(1, 0, 1.4, 0))
for (l in 1:n_latent) {
coda::traceplot(mod_Tr$mcmc.latent[[paste0("lv_",l)]][,1],
main = paste0(" Trace of W_1", l),cex.main=1.6)
coda::densplot(mod_Tr$mcmc.latent[[paste0("lv_",l)]][,1],
main = paste0(" Density of W_1", l),cex.main=1.6)
abline(v=mean(mod_Tr$mcmc.latent[[paste0("lv_",l)]][,1]),col="blue")
}
title(main="Latentes variables W_1", outer=T, cex.main=1.8)
## Deviance
par(mfrow=c(1,2),oma=c(1, 0, 1.5, 0))
coda::traceplot(mod_Tr$mcmc.Deviance,main="Trace",cex.main=1.6)
coda::densplot(mod_Tr$mcmc.Deviance,main="Density",cex.main=1.6)
abline(v=mean(mod_Tr$mcmc.Deviance),col="blue")
title(main = "Deviance",outer=T, cex.main=1.8)
# theta
par(mfrow=c(1,2))
hist(mod_Tr$probit_theta_latent,col="light blue", main = "probit(theta) estimated")
hist(mod_Tr$theta_latent, breaks=pretty, col="light blue", main = "theta estimated")
Overall, the traces and densities of the parameters indicate the convergence of the algorithm. Indeed, we observe on the traces that the values oscillate around averages without showing any upward or downward trend and we see that the densities are quite smooth and for most of them of Gaussian form.
load("/home/clement/Documents/presentations/workshop-Metradica-March2023/jSDM_tutorial_cache/output.RData")
# French Guiana borders
FG_borders <- terra::vect(output[[2]], layer = "gadm36_GUF_0")
FG_borders <- terra::project(FG_borders, terra::crs(xy))
# Observed presence absence
id_pres <- which(PA[, 264]==1)
id_abs <- which(PA[, 264]==0)
obs_pres <- terra::vect(terra::crds(xy[id_pres,]),
type="point", crs=terra::crs(xy))
obs_abs <- terra::vect(terra::crds(xy[id_abs,]),
type="point", crs=terra::crs(xy))
# Representation
par(mfrow=c(1,1))
terra::plot(FG_borders, col="lightgray",
main="Observed occurences of Tabernaemontana undulata",
cex.main=1.4)
terra::points(obs_pres, pch=16, cex=1.3)
terra::points(obs_abs, pch=1, cex=1.3)
legend("right", legend=c("presence","absence"), pch=c(16,1))
# Estimated probabilities of presence
theta_latent_sp <- terra::vect(data.frame(terra::crds(xy), mod$theta_latent),
crs=terra::crs(xy), geom=c("x","y"))
# Representation
# define groups for mapping
cuts <- c(0,0.1,0.2,0.3,0.4,0.5,0.7,0.9,1)
col <- c('red3','red','orange','yellow', 'yellow green',
'green3', 'forest green','dark green')
terra::plot(FG_borders, col="lightgray",
main ="Estimated probabilities of presence for Tabernaemontana undulata",
cex.main=1.4)
terra::plot(theta_latent_sp, y=264, pch=20, cex=1.3,
legend="topright", plg=list(cex=1.01), add=TRUE, col=col,
type="interval", breaks=cuts)
It can be observed that the inventory sites where this species was observed match those for which a high probability of presence was estimated by the JSDM.
load("/home/clement/Documents/presentations/workshop-Metradica-March2023/jSDM_tutorial_cache/output.RData")
PA_Tr <- PA[ ,colnames(PA) %in% Tr$Name]
colnames(PA_Tr) <- paste0("sp_", 1:ncol(PA_Tr))
# French Guiana borders
FG_borders <- terra::vect(output[[2]], layer = "gadm36_GUF_0")
FG_borders <- terra::project(FG_borders, terra::crs(xy))
# Observed presence absence
id_pres <- which(PA_Tr[, 1]==1)
id_abs <- which(PA_Tr[, 1]==0)
obs_pres <- terra::vect(terra::crds(xy[id_pres,]),
type="point", crs=terra::crs(xy))
obs_abs <- terra::vect(terra::crds(xy[id_abs,]),
type="point", crs=terra::crs(xy))
# Representation
par(mfrow=c(1,1))
terra::plot(FG_borders, col="lightgray",
main="Observed occurences of Iryanthera sagotiana",
cex.main=1.4)
terra::points(obs_pres, pch=16, cex=1.3)
terra::points(obs_abs, pch=1, cex=1.3)
legend("right", legend=c("presence","absence"), pch=c(16,1))
# Estimated probabilities of presence
theta_latent_sp <- terra::vect(data.frame(terra::crds(xy), mod_Tr$theta_latent),
crs=terra::crs(xy), geom=c("x","y"))
# Representation
# define groups for mapping
cuts <- c(0,0.1,0.2,0.3,0.5,0.7,0.8,0.9,1)
col <- c('red3','red','orange','yellow', 'yellow green',
'green3', 'forest green','dark green')
terra::plot(FG_borders, col="lightgray",
main ="Estimated probabilities of presence for Iryanthera sagotiana",
cex.main=1.4)
terra::plot(theta_latent_sp, y=1, pch=20, cex=1.3,
legend="topright", plg=list(cex=1.01), add=TRUE, col=col,
type="interval", breaks=cuts)
It can be observed that the inventory sites where this species was observed match those for which a high probability of presence was estimated by the JSDM.
Species richness also called diversity \(\alpha\) reflects the number of species coexisting in a given environment and is computed by summing the number of species present on each site.
We spatially represent the species richness observed at each inventory site defined by \(R_i=\sum\limits_ {j=1}^{n_{species}} y_{ij}\) and the species richness estimated, following Scherrer, Mod & Guisan (2020), by summing the estimated occurrence probabilities of all species on each site: \(\widehat{R}_i=\sum\limits_ {j=1}^{n_{species}} \widehat{\theta}_{ij}\).
# French Guiana borders
FG_borders <- terra::vect(output[[2]], layer = "gadm36_GUF_0")
FG_borders <- terra::project(FG_borders, terra::crs(xy))
# Representation of observed species richness
species_richness_obs <- data.frame(species_richness_obs=rowSums(PA))
species_richness_obs_sp <- terra::vect(cbind(terra::crds(xy),
species_richness_obs),
geom=c("x","y"), crs=terra::crs(xy))
par(mfrow=c(1,1))
cuts <- c(0,40,80,100, 120, 140, 160, 200, ncol(mod$theta_latent))
col <- c('red4','red','orange','yellow',
'yellow green', 'green3',
'forest green','dark green')
terra::plot(FG_borders, col="lightgray",
main ="Observed species richness",
cex.main=1.4)
terra::plot(species_richness_obs_sp, 'species_richness_obs',
breaks=cuts, col=col,
pch=20, cex=1.2, add=TRUE,
legend="topright", plg=list(cex=1.01), type="interval")
# Representation of estimated species richness
par(mfrow=c(1,1))
species_richness <- data.frame(species_richness= apply(mod$theta_latent,1,sum))
species_richness_sp <- terra::vect(cbind(terra::crds(xy),
species_richness),
geom=c("x","y"), crs=terra::crs(xy))
terra::plot(FG_borders, col="lightgray",
main ="Estimated species richness",
cex.main=1.4)
terra::plot(species_richness_sp,
'species_richness', col=col,
breaks=cuts, pch=20, cex=1.2, add=TRUE,
legend="topright", plg=list(cex=1.01),
type="interval")
# Comparison between observed and estimated species richness at inventory sites
plot(species_richness_obs$species_richness_obs,
species_richness$species_richness,
main="Species richness", xlab="observed",
ylab="estimated",
cex.main=1.4, cex.lab=1.3)
abline(a=0,b=1,col='red')
It can be seen that the observed species richness on the inventory sites corresponds fairly well to that estimated by summing the estimated probabilities of occurrence of all species on each site.
PA_Tr <- PA[ ,colnames(PA) %in% Tr$Name]
# French Guiana borders
FG_borders <- terra::vect(output[[2]], layer = "gadm36_GUF_0")
FG_borders <- terra::project(FG_borders, terra::crs(xy))
# Representation of observed species richness
species_richness_obs <- data.frame(species_richness_obs=rowSums(PA_Tr))
species_richness_obs_sp <- terra::vect(cbind(terra::crds(xy),
species_richness_obs),
geom=c("x","y"), crs=terra::crs(xy))
par(mfrow=c(1,1))
cuts <- c(0, 4, 6, 8, 10, 12, 16, 20, ncol(mod_Tr$theta_latent))
col <- c('red4','red','orange','yellow',
'yellow green', 'green3',
'forest green','dark green')
terra::plot(FG_borders, col="lightgray",
main ="Observed species richness",
cex.main=1.4)
terra::plot(species_richness_obs_sp, 'species_richness_obs',
breaks=cuts, col=col,
pch=20, cex=1.2, add=TRUE,
legend="topright", plg=list(cex=1.01), type="interval")
# Representation of estimated species richness
par(mfrow=c(1,1))
species_richness <- data.frame(species_richness= apply(mod_Tr$theta_latent,1,sum))
species_richness_sp <- terra::vect(cbind(terra::crds(xy),
species_richness),
geom=c("x","y"), crs=terra::crs(xy))
terra::plot(FG_borders, col="lightgray",
main ="Estimated species richness",
cex.main=1.4)
terra::plot(species_richness_sp,
'species_richness', col=col,
breaks=cuts, pch=20, cex=1.3, add=TRUE,
legend="topright", plg=list(cex=1.01),
type="interval")
# Comparison between observed and estimated species richness at inventory sites
plot(species_richness_obs$species_richness_obs,
species_richness$species_richness,
main="Species richness", xlab="observed",
ylab="estimated",
cex.main=1.4, cex.lab=1.3)
abline(a=0,b=1,col='red')
It can be seen that the observed species richness on the inventory sites corresponds fairly well to that estimated by summing the estimated probabilities of occurrence of all species on each site.
After fitting the JSDM with latent variables, the full species residual correlation matrix \(R=(R_{ij})^{i=1,\ldots, n_{site}}_{j=1,\ldots, n_{species}}\) can be derived from the covariance in the latent variables such as : \[\Sigma_{ij} = \lambda_i^T .\lambda_j \], then we compute correlations from covariances : \[R_{i,j} = \frac{\Sigma_{ij}}{\sqrt{\Sigma _{ii}\Sigma _{jj}}}\].
n.species <- ncol(mod$model_spec$presence_data)
n.mcmc <- nrow(mod$mcmc.latent[[1]])
Tau.cor.arr <- matrix(NA,n.mcmc,n.species^2)
for(t in 1:n.mcmc) {
lv.coefs <- t(sapply(mod$mcmc.sp, "[", t, grep("lambda",colnames(mod$mcmc.sp[[1]]))))
Tau.mat <- lv.coefs %*% t(lv.coefs)
Tau.cor.mat <- cov2cor(Tau.mat)
Tau.cor.arr[t,] <- as.vector(Tau.cor.mat)
}
## Average over the MCMC samples
R <- matrix(apply(Tau.cor.arr,2,mean),n.species,byrow=F)
write.csv(R, row.names = F,
file ="~/Documents/presentations/workshop-Metradica-March2023/jSDM_tutorial_cache/R.csv")
We represent the full residual correlation matrix for the fifty more abundant species and the residual correlation matrix with only “significant” correlations, whose $ 95 %$ HPD interval over the MCMC samples does not contain zero.
mod_50 <- jSDM::jSDM_binomial_probit(
# Response variable
presence_data = PA[ ,order(colSums(PA),
decreasing = TRUE)[1:50]],
# Explanatory variables
site_formula = ~ temp + prec + sais_temp + sais_prec + cwd + temp2 + prec2 + sais_temp2 + sais_prec2 + cwd2,
site_data=scaled_data_clim2,
n_latent=2,
site_effect="random",
# Chains
burnin= 5000, mcmc=10000, thin=10,
# Starting values
alpha_start=0, beta_start=0,
lambda_start=0, W_start=0,
V_alpha=1,
# Priors
shape_Valpha=0.1,
rate_Valpha=0.1,
mu_beta=0, V_beta=c(10,rep(1,np-1)),
mu_lambda=0, V_lambda=1,
# Various
seed=1234, verbose=1)
# Plot residual correlation matrix
jSDM::plot_residual_cor(mod_50, tl.cex = 0.6)
jSDM::plot_residual_cor(mod_50, prob=0.95, tl.cex = 0.6)
This representation of associations between species allows us to observe the positive or negative correlations between species which can be interpreted in terms of positive or negative influence of the presence of a species on the probability of occurrence of another.
We represent the full residual correlation matrix for all species and the residual correlation matrix with only “significant” correlations, whose $ 95 %$ HPD interval over the MCMC samples does not contain zero.
# Plot residual correlation matrix
jSDM::plot_residual_cor(mod_Tr, tl.cex = 0.8)
jSDM::plot_residual_cor(mod_Tr, prob=0.95, tl.cex = 0.8)